library(mosaic)
## Registered S3 method overwritten by 'mosaic':
## method from
## fortify.SpatialPolygonsDataFrame ggplot2
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
## Attache Paket: 'mosaic'
## Die folgenden Objekte sind maskiert von 'package:dplyr':
##
## count, do, tally
## Das folgende Objekt ist maskiert 'package:Matrix':
##
## mean
## Das folgende Objekt ist maskiert 'package:ggplot2':
##
## stat
## Die folgenden Objekte sind maskiert von 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
## quantile, sd, t.test, var
## Die folgenden Objekte sind maskiert von 'package:base':
##
## max, mean, min, prod, range, sample, sum
library(plotly)
##
## Attache Paket: 'plotly'
## Das folgende Objekt ist maskiert 'package:mosaic':
##
## do
## Das folgende Objekt ist maskiert 'package:ggplot2':
##
## last_plot
## Das folgende Objekt ist maskiert 'package:stats':
##
## filter
## Das folgende Objekt ist maskiert 'package:graphics':
##
## layout
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(dplyr)
library(rpart)
library(caret)
##
## Attache Paket: 'caret'
## Das folgende Objekt ist maskiert 'package:mosaic':
##
## dotPlot
library(psych)
##
## Attache Paket: 'psych'
## Die folgenden Objekte sind maskiert von 'package:mosaic':
##
## logit, rescale
## Die folgenden Objekte sind maskiert von 'package:ggplot2':
##
## %+%, alpha
library(ggplot2)
library(ggcorrplot)
library(rela)
# Delete all variables
rm( list = ls() )
read_idle = read.csv("03_Lunges_with_Dari/01_Idle.csv")
idle_data <- data.frame(read_idle)
read_run = read.csv("03_Lunges_with_Dari/02_Running.csv")
run_data <- data.frame(read_run)
read_lunge= read.csv("03_Lunges_with_Dari/03_Lunge.csv")
lunge_data <- data.frame(read_lunge)
read_siu = read.csv("03_Lunges_with_Dari/04_Siu.csv")
siu_data <- data.frame(read_siu)
Rename ID correctly:
names(idle_data)[1] <- "ID"
names(run_data)[1] <- "ID"
names(lunge_data)[1] <- "ID"
names(siu_data)[1] <- "ID"
Overall in total there are 8985 rows
So in the Lunges data for Orientation.X and Orientation.Z we have the wrong data type.
Basically we can’t scale before converting to numeric
idle_run <- rbind(idle_data, run_data)
irun_lunge <- rbind(idle_run, lunge_data)
motion_data <- rbind(irun_lunge, siu_data)
idle_run$Orientation.X <- as.numeric(idle_run$Orientation.X)
colSums(is.na(idle_run))
## ID Author Category
## 0 0 0
## Sample Acceleration.Timestamp Acceleration.X
## 0 0 0
## Acceleration.Y Acceleration.Z AngularVelocity.X
## 0 0 0
## AngularVelocity.Y AngularVelocity.Z MagneticField.X
## 0 0 1499
## MagneticField.Y MagneticField.Z Orientation.X
## 1499 1499 0
## Orientation.Y Orientation.Z
## 0 0
motion_data_all <- data.frame(motion_data)
# Remove Magnetic, because there are many NA's in it
motion_data_all <- motion_data_all[,!names(motion_data_all) %in% c("MagneticField.X")]
motion_data_all <- motion_data_all[,!names(motion_data_all) %in% c("MagneticField.Y")]
motion_data_all <- motion_data_all[,!names(motion_data_all) %in% c("MagneticField.Z")]
# Convert columns to correct type
motion_data_all$Category <- as.factor(motion_data_all$Category)
motion_data_all$Acceleration.X <- as.numeric(motion_data_all$Acceleration.X)
## Warning: NAs durch Umwandlung erzeugt
motion_data_all$Orientation.X <- as.numeric(motion_data_all$Orientation.X)
motion_data_all$Orientation.Z <- as.numeric(motion_data_all$Orientation.Z)
More NA’s found after convertion
colSums(is.na(motion_data_all))
## ID Author Category
## 0 0 0
## Sample Acceleration.Timestamp Acceleration.X
## 0 0 1
## Acceleration.Y Acceleration.Z AngularVelocity.X
## 0 0 1
## AngularVelocity.Y AngularVelocity.Z Orientation.X
## 1 1 0
## Orientation.Y Orientation.Z
## 0 0
Remove the NA’s
About 8584 rows left
motion_data_all <- na.omit(motion_data_all)
colSums(is.na(motion_data_all))
## ID Author Category
## 0 0 0
## Sample Acceleration.Timestamp Acceleration.X
## 0 0 0
## Acceleration.Y Acceleration.Z AngularVelocity.X
## 0 0 0
## AngularVelocity.Y AngularVelocity.Z Orientation.X
## 0 0 0
## Orientation.Y Orientation.Z
## 0 0
Scale the data:
motion_data_plot <- data.frame(motion_data_all)
motion_data_plot$acceleration <- sqrt(motion_data_plot$Acceleration.X^2 + motion_data_plot$Acceleration.Y^2 + motion_data_plot$Acceleration.Z^2)
motion_data_plot$angularVelocity <- sqrt(motion_data_plot$AngularVelocity.X^2 + motion_data_plot$AngularVelocity.Y^2 + motion_data_plot$AngularVelocity.Z^2)
quant_var <- select(motion_data_plot, c(6:16))
cat_var <- select(motion_data_plot, c(2:3))
quant_var <- scale(quant_var)
motion_data_scale <- cbind(cat_var, quant_var)
motion_data_scale
Train with data from Ahmed, Tobias, Saghar and Ronaldo
#motion_data_train <- subset(motion_data_all, Author == "Ahmed" | Author == "Tobias" | Author == "Saghar" | Author == "Ronaldo") #+ subset(motion_data_all, Author == "Tobias")
#motion_data_unknown <- subset(motion_data, Author == "Regan" | Author == "Darian") # 33 %
motion_data_train <- subset(motion_data_all, Author == "Ahmed" | Author == "Tobias"| Author == "Ronaldo"| Author == "Regan" | Author == "Darian" )
motion_data_test <- subset(motion_data_all, Author == "Saghar" )
# For statistics
motion_data_all_stat <- data.frame(motion_data_plot)
# Remove unrelevant columns
motion_data_all <- motion_data_all[,!names(motion_data_all) %in% c("ID", "Acceleration.Timestamp", "Author", "Sample")]
#motion_data_plot <- motion_data_plot[,!names(motion_data_plot) %in% c("ID", "Acceleration.Timestamp", "Author", "Sample")]
Write merged cleaned data to file:
write.csv(motion_data_all, "All Samples Clean.csv", row.names = FALSE)
Stacked bar chart:
Seems like Darian and Ahmed have more compared to the others more motion data
cat_count <- group_by(motion_data_all_stat, Author, Category) %>%
summarize(count=n())
## `summarise()` has grouped output by 'Author'. You can override using the
## `.groups` argument.
stack_bar <- ggplot(cat_count, aes(x = Author, y = count, fill = Category)) +
geom_bar(stat = "identity") #+
#geom_text(aes(label = count), vjust = -4.5)
ggplotly(stack_bar)
categories <- c("Tobias", "Saghar", "Ronaldo", "Regan", "Ahmed", "Darian")
for (i in categories){
test <- subset(motion_data_all_stat, Author == i)
plot <- ggplot(test, aes(x=acceleration, fill=Category)) +
#geom_histogram(bins=(sqrt(length(cat_count$Category))),fill="white",color="black",aes(y=..density..)) +
geom_density(alpha=.3) +
facet_grid(.~Author)
#scale_x_continuous(limits=c(-2.5, 8), expand=c(0,0))
print(plot)
}
categories <- c("Tobias", "Saghar", "Ronaldo", "Regan", "Ahmed", "Darian")
for (i in categories){
test <- subset(motion_data_all_stat, Author == i & Category == c("Lunges", "Running", "Siu"))
plot <- ggplot(test, aes(x=acceleration, fill=Category)) +
#geom_histogram(bins=(sqrt(length(cat_count$Category))),fill="white",color="black",aes(y=..density..)) +
geom_density(alpha=.3) +
facet_grid(.~Author)
#scale_x_continuous(limits=c(-2.5, 8), expand=c(0,0))
print(plot)
}
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in is.na(e1) | is.na(e2): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in is.na(e1) | is.na(e2): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
## ist kein Vielfaches der Länge des kürzeren Objektes
Threshold: 0.2
Old one: Remaining features: Acceleration.X, Acceleration.Z, Orientation.X, Orientation.Y, Orientation.Z
New one: Remaining features: Acceleration.X, Acceleration.Y, Acceleration.Z, AngularVelocity.X, AngularVelocity.Y, AngularVelocity.Z
We remove the orientation, since everyone had a different phone position
motion_data_all_numeric <- data.frame(motion_data_all)
motion_data_all_numeric <- motion_data_all_numeric[,!names(motion_data_all_numeric) %in% c("Category")]
#Was for only for testing -> Darian: Everyone has different position of phone, thats why we should skip Orientation
#motion_data_all_numeric <- motion_data_all_numeric[,!names(motion_data_all_numeric) %in% c("Orientation.X", "Orientation.Y", "Orientation.Z")]
#motion_data_all_numeric$Category <- as.numeric(factor(motion_data_all_numeric$Category))
#motion_data_all_numeric$Category <- as.factor(motion_data_all_numeric$Category)
motion_data_all_numeric
# Calculate the correlation matrix of the data frame
cor_matrix <- cor(motion_data_all_numeric)
# Visualize the correlation matrix using ggcorrplot
plot <- ggcorrplot(cor_matrix, hc.order = TRUE, type = "lower",
lab = TRUE, lab_size = 3, method = "circle")
ggplotly(plot)
## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]: Anzahl
## der zu ersetzenden Elemente ist kein Vielfaches der Ersetzungslänge
## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]: Anzahl
## der zu ersetzenden Elemente ist kein Vielfaches der Ersetzungslänge
## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]: Anzahl
## der zu ersetzenden Elemente ist kein Vielfaches der Ersetzungslänge
## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]: Anzahl
## der zu ersetzenden Elemente ist kein Vielfaches der Ersetzungslänge
## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]: Anzahl
## der zu ersetzenden Elemente ist kein Vielfaches der Ersetzungslänge
We use only relevant columns for the model training - So only numeric ones and the category
remove_col <- c("ID", "Acceleration.Timestamp", "Author", "Sample", "Orientation.X", "Orientation.Y", "Orientation.Z")
motion_data_train <- motion_data_train[,!names(motion_data_train) %in% remove_col]
plot_test <- data.frame(motion_data_test)
plot_test <- plot_test[,!names(plot_test) %in% remove_col]
motion_data_train_numeric <- data.frame(plot_test)
motion_data_train_numeric <- motion_data_train_numeric[,!names(motion_data_train_numeric) %in% c("Category")]
#idle_tobias <- subset(motion_data_tobias[1:5], Category == "Idle")
plot <- ggpairs(data=motion_data_train_numeric,aes(color = plot_test$Category), title="Motion pair plot with quantiative variables´for Saghar",
upper = list(
continuous = wrap("cor", size = 2.75)
)
)
plot
corr_data <- select(subset(motion_data_all_stat, Author == "Saghar"), c("acceleration", "angularVelocity"))
#idle_tobias <- subset(motion_data_tobias[1:5], Category == "Idle")
plot <- ggpairs(data=corr_data,aes(color = plot_test$Category), title="Motion pair plot with acceleration and angular velocity",
upper = list(
continuous = wrap("cor", size = 2.75)
)
)
plot
motion_data_box <- select(motion_data_scale, c("Acceleration.X","Acceleration.Y","Acceleration.Z","AngularVelocity.X","AngularVelocity.Y","AngularVelocity.Z"))
boxplot(motion_data_box) +
#scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(axis.text.x = element_text(angle = 45))
## NULL
#geom_violin(trim = FALSE) +
#geom_boxplot()
#theme_minimal()
#### PAIR PLOT ####
motion_data_box <- data.frame(motion_data_all_stat)
motion_data_box %>% select("Acceleration.X", "Acceleration.Y", "Acceleration.Z", "AngularVelocity.X", "AngularVelocity.Y", "AngularVelocity.Z")
motion_data_box$Acceleration = sqrt(motion_data_box$Acceleration.X^2 + motion_data_box$Acceleration.Z^2 + motion_data_box$Acceleration.Y^2)
motion_data_box$Orientation = sqrt(motion_data_box$Orientation.X^2 + motion_data_box$Orientation.Y^2 + motion_data_box$Orientation.Z^2)
motion_data_box$AngularVelocity = sqrt(motion_data_box$AngularVelocity.X^2 + motion_data_box$AngularVelocity.Y^2 + motion_data_box$AngularVelocity.Z^2)
pairs(motion_data_box %>% select("Acceleration", "AngularVelocity"))
#### CORRELATION PLOT ####
corPlot(motion_data_box %>% select("Acceleration.X", "Acceleration.Y", "Acceleration.Z", "AngularVelocity.X", "AngularVelocity.Y", "AngularVelocity.Z")) +
theme(axis.text.x = element_text(angle = 90))
## NULL
corPlot(motion_data_box %>% select("Acceleration", "AngularVelocity"))
## Distribution of each movement added by the Author#### DENSITY PLOT #####
ggplot(motion_data_box, aes(x=Author, fill=Category)) +
geom_density() +
ggtitle("Distribution of Movements") +
theme(plot.title = element_text(hjust = 0.5)) + facet_wrap(~Author, ncol = 3) +
theme(axis.text.x = element_text(angle = 90))
# geom_vline(data=mu, aes(xintercept=grp.mean, color=sex),
# linetype="dashed")
#### BOXPLOT ####
ggplot(motion_data_box, aes(x = Author, y = AngularVelocity.X, fill = Category)) +
geom_boxplot(outlier.colour="red", outlier.shape=1,
outlier.size=4) +
ggtitle("Boxplot of AngularVelocity.X w.r.t Authors") +
theme(plot.title = element_text(hjust = 0.5))
#library(lubridate)
#density_data <- data.frame(motion_data_all_stat)
# convert character to POSIXct
#density_data$timestamp <- as.POSIXct(density_data$Acceleration.Timestamp/1000, origin="1970-01-01")
# extract hour and minute:
#density_data$time <- hms::hms(second(density_data$Acceleration.Timestamp), minute(density_data$Acceleration.Timestamp), hour(density_data$Acceleration.Timestamp))
# convert to POSIXct again since ggplot does not work with class hms.
#density_data$time <- as.POSIXct(density_data$time)
#density_data$date <-as.Date(as.POSIXct(density_data$Acceleration.Timestamp, origin="1970-01-01"))
#density_data
# Visualize the acceleration measurements over time
#plot(motion_data_all_test$Acceleration.Timestamp, motion_data_all_test$Acceleration.Y, type = "l")
#lines(motion_data_all_test$Acceleration.Timestamp, motion_data_all_test$Acceleration.Z, col = "red")
#legend("topright", c( "Acceleration Y", "Acceleration Z"), lty = 1, col = c("black", "red"))
#run_plot <- group_by(run_activity, Activity) %>%
## ggplot(aes(x=timestamp)) +
# labs( x = "Timestamp", y = "Acceleration") +
# geom_line(aes(y = X), color="dark green", alpha = 0.8) +
# geom_line(aes(y = Y), color="light blue", alpha = 0.8) +
# geom_line(aes(y = Z), color="dark orange", alpha = 0.8)
remove_col <- c("ID", "Author", "Sample", "Orientation.X", "Orientation.Y", "Orientation.Z")
idle_activity = subset(motion_data, Category == "Running" & Author == "Tobias")
idle_activity <- idle_activity[,!names(idle_activity) %in% remove_col]
#test <- scale_x_datetime(breaks = date_breaks("1 hours"), labels=date_format("%H:%m"), expand = c(0,0))
#test
idle_plot <- group_by(idle_activity, Category) %>%
ggplot(aes(x=Acceleration.Timestamp)) +
labs( x = "Timestamp", y = "Acceleration") +
geom_line(aes(y = Acceleration.X), color="dark green", alpha = 0.8) +
geom_line(aes(y = Acceleration.Y), color="light blue", alpha = 0.8) +
geom_line(aes(y = Acceleration.Z), color="dark orange", alpha = 0.8)
#ggplotly(idle_plot)
Train split: 80 %, Test split: 20 %
Since the features that we selected correlate good and are relevant, we skip the angular velocity
set.seed(10)
# Take variables from correlation analysis
feature_selection <- motion_data_train#[,c("Category", "Acceleration.X", "Acceleration.Y", "Acceleration.Z")]
train_index_all <- createDataPartition(feature_selection$Category, p =0.80, list = FALSE)
train_data_all<-feature_selection[train_index_all, ]
test_data_all<-feature_selection[-train_index_all, ]
set.seed(6)
# 6: 89.8 %
control_par <- trainControl(method = "cv", number=4)
model_rf_all <- train(Category~.,
data=train_data_all,
"rf",
trControl = control_par
)
model_rf_all
## Random Forest
##
## 6153 samples
## 6 predictor
## 4 classes: 'Idle', 'Lunges', 'Running', 'Siu'
##
## No pre-processing
## Resampling: Cross-Validated (4 fold)
## Summary of sample sizes: 4614, 4615, 4615, 4615
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8090375 0.7439799
## 4 0.8039982 0.7372037
## 6 0.7970094 0.7278494
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
Random forest with cross validation 4 fold
cm_train_data <- confusionMatrix(model_rf_all)
cm_train_data
## Cross-Validated (4 fold) Confusion Matrix
##
## (entries are percentual average cell counts across resamples)
##
## Reference
## Prediction Idle Lunges Running Siu
## Idle 23.9 0.7 1.2 0.4
## Lunges 0.8 16.3 1.0 2.3
## Running 1.9 1.7 23.3 5.2
## Siu 0.5 1.4 2.0 17.3
##
## Accuracy (average) : 0.809
set.seed(6)
## Generate predictions
rf_all_pred_test <- predict(model_rf_all,test_data_all)
## Print the accuracy
accuracy_rf_test <- mean(rf_all_pred_test == test_data_all$Category)*100
accuracy_rf_test
## [1] 82.48698
cm_test_data <- confusionMatrix(rf_all_pred_test, test_data_all$Category)
cm_test_data
## Confusion Matrix and Statistics
##
## Reference
## Prediction Idle Lunges Running Siu
## Idle 369 9 13 5
## Lunges 14 253 12 41
## Running 23 29 371 68
## Siu 9 19 27 274
##
## Overall Statistics
##
## Accuracy : 0.8249
## 95% CI : (0.8049, 0.8436)
## No Information Rate : 0.2754
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7653
##
## Mcnemar's Test P-Value : 1.219e-06
##
## Statistics by Class:
##
## Class: Idle Class: Lunges Class: Running Class: Siu
## Sensitivity 0.8892 0.8161 0.8771 0.7062
## Specificity 0.9759 0.9454 0.8922 0.9521
## Pos Pred Value 0.9318 0.7906 0.7556 0.8328
## Neg Pred Value 0.9596 0.9531 0.9502 0.9056
## Prevalence 0.2702 0.2018 0.2754 0.2526
## Detection Rate 0.2402 0.1647 0.2415 0.1784
## Detection Prevalence 0.2578 0.2083 0.3197 0.2142
## Balanced Accuracy 0.9325 0.8807 0.8846 0.8291
plt <- as.data.frame(cm_test_data$table)
plt$Prediction <- factor(plt$Prediction, levels=rev(levels(plt$Prediction)))
rf_conf_mat <- ggplot(plt, aes(Prediction,Reference, fill= Freq)) +
geom_tile() + geom_text(aes(label=Freq)) +
scale_fill_gradient(low="white", high="#009194") +
labs(x = "Prediction",y = "Reference") +
scale_y_discrete(labels=c("Idle","Lunges","Running","Siu")) +
scale_x_discrete(labels=c("Siu", "Running", "Lunges", "Idle"))
ggplotly(rf_conf_mat)
remove_col <- c("ID", "Acceleration.Timestamp", "Author", "Orientation.X", "Orientation.Y", "Orientation.Z")
motion_data_test <- motion_data_test[,!names(motion_data_test) %in% remove_col]
motion_data_test$Sample <- as.numeric(as.factor(motion_data_test$Sample))
unique(motion_data_test$Category)
## [1] Idle Running Lunges Siu
## Levels: Idle Lunges Running Siu
Lunges: 1 - 20 Idle: 11 - 20 Run: 22 - 30 Siu: 31 - 40
inspect(motion_data_test)
##
## categorical variables:
## name class levels n missing
## 1 Category factor 4 1420 0
## distribution
## 1 Running (47.6%), Lunges (22.1%) ...
##
## quantitative variables:
## name class min Q1 median Q3 max
## 1 Sample numeric 1.00000 13.0000000 22.000000 27.0000000 40.00000
## 2 Acceleration.X numeric -19.24533 5.6348475 8.989855 10.3458525 74.95678
## 3 Acceleration.Y numeric -62.43217 -2.5990100 -1.087490 -0.0446425 10.99254
## 4 Acceleration.Z numeric -27.55201 -1.3236225 1.985545 4.8890625 40.44529
## 5 AngularVelocity.X numeric -7.90234 -0.5865550 -0.007295 0.5926225 12.85294
## 6 AngularVelocity.Y numeric -7.73286 -0.2825200 0.029700 0.3547175 10.01106
## 7 AngularVelocity.Z numeric -12.65705 -0.2874675 -0.006055 0.2679275 7.92185
## mean sd n missing
## 1 20.17323944 9.586790 1420 0
## 2 8.94612076 8.124970 1420 0
## 3 -2.02695663 5.929216 1420 0
## 4 1.79015740 5.362013 1420 0
## 5 0.03390892 1.698911 1420 0
## 6 0.05902677 1.120474 1420 0
## 7 -0.03541651 1.196526 1420 0
Lunges is not recognized at all: 10/10 are missclassified
Idle: 10 / 10 Samples with at least 70 % correct
Running: 10 / 10 Samples with at least 60 % correct
Siu: 9 / 10 Samples with at least 50 % correct
In total we have an avg accuracy of 60 %
list_motion_data_unknown = c()
total_accuracy <- 0
sample_accuracy <- 0
correct_samples_rf <- 0
for(i in 1:length(unique(motion_data_test$Sample))){
#print(i)
motion_data_unknown <- subset(motion_data_test,Sample == i) # 55.76 %
ref <- motion_data_unknown$Category[motion_data_unknown$Sample == i]
motion_data_unknown <- motion_data_unknown[,!names(motion_data_unknown) %in% c("Sample")]
motion_data_no_labels <- data.frame(motion_data_unknown)
names(motion_data_no_labels)[names(motion_data_no_labels) == "Category"] <- "Category"
motion_data_no_labels$Category <- ""
set.seed(6)
## Generate predictions
rf_Lunges_pred_new <- predict(object = model_rf_all,newdata = motion_data_no_labels)
## Print the accuracy
accuracy <- mean(rf_Lunges_pred_new == motion_data_unknown$Category )*100
total_accuracy <- total_accuracy + accuracy
motion_data_no_labels$Category = rf_Lunges_pred_new
cm_rf_all <- confusionMatrix(rf_Lunges_pred_new, motion_data_no_labels$Category)
#print(cm_rf_all)
test <- as.data.frame(cm_rf_all$table)
if (unique(ref) == test$Prediction[which.max(test$Freq)]) {
correct_samples_rf <- correct_samples_rf + 1
}
print(paste("Reference: ", unique(ref), "Prediction: ", test$Prediction[which.max(test$Freq)], "Accuracy: ", accuracy, sep = " "))
list_motion_data_unknown <- append(list_motion_data_unknown, motion_data_no_labels)
}
## [1] "Reference: Idle Prediction: Lunges Accuracy: 45.4545454545455"
## [1] "Reference: Idle Prediction: Idle Accuracy: 66.6666666666667"
## [1] "Reference: Idle Prediction: Lunges Accuracy: 14.2857142857143"
## [1] "Reference: Idle Prediction: Idle Accuracy: 72"
## [1] "Reference: Idle Prediction: Idle Accuracy: 70"
## [1] "Reference: Idle Prediction: Idle Accuracy: 60"
## [1] "Reference: Idle Prediction: Lunges Accuracy: 45.1612903225806"
## [1] "Reference: Idle Prediction: Idle Accuracy: 61.2903225806452"
## [1] "Reference: Idle Prediction: Lunges Accuracy: 11.1111111111111"
## [1] "Reference: Idle Prediction: Idle Accuracy: 74.0740740740741"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 93.75"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 84.375"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 81.25"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 90.625"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 84.375"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 84.375"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 87.5"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 84.375"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 81.25"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 73.0769230769231"
## [1] "Reference: Running Prediction: Running Accuracy: 58.7301587301587"
## [1] "Reference: Running Prediction: Running Accuracy: 76.1194029850746"
## [1] "Reference: Running Prediction: Running Accuracy: 72.0588235294118"
## [1] "Reference: Running Prediction: Running Accuracy: 77.1084337349398"
## [1] "Reference: Running Prediction: Running Accuracy: 74.6666666666667"
## [1] "Reference: Running Prediction: Running Accuracy: 76.4705882352941"
## [1] "Reference: Running Prediction: Running Accuracy: 77.9411764705882"
## [1] "Reference: Running Prediction: Running Accuracy: 70.3125"
## [1] "Reference: Running Prediction: Running Accuracy: 66.1290322580645"
## [1] "Reference: Running Prediction: Running Accuracy: 58.6206896551724"
## [1] "Reference: Siu Prediction: Siu Accuracy: 55"
## [1] "Reference: Siu Prediction: Siu Accuracy: 65"
## [1] "Reference: Siu Prediction: Siu Accuracy: 76.4705882352941"
## [1] "Reference: Siu Prediction: Siu Accuracy: 73.3333333333333"
## [1] "Reference: Siu Prediction: Siu Accuracy: 81.25"
## [1] "Reference: Siu Prediction: Running Accuracy: 0"
## [1] "Reference: Siu Prediction: Siu Accuracy: 73.3333333333333"
## [1] "Reference: Siu Prediction: Siu Accuracy: 76.9230769230769"
## [1] "Reference: Siu Prediction: Siu Accuracy: 56.25"
## [1] "Reference: Siu Prediction: Siu Accuracy: 83.3333333333333"
print(paste("Amount of rows for all samples in total: ", nrow(subset(motion_data, Author == "Saghar" ))))
## [1] "Amount of rows for all samples in total: 1420"
print(paste("Amount of samples in total: ", length(unique(motion_data_test$Sample))))
## [1] "Amount of samples in total: 40"
sample_accuracy_rf <- total_accuracy / length(unique(motion_data_test$Sample))
print(paste("Accuracy over all samples: ", sample_accuracy_rf))
## [1] "Accuracy over all samples: 67.8511696249001"
avg_acc_rf <- correct_samples_rf / length(unique(motion_data_test$Sample))
print(paste("Accuracy of correct samples: ", avg_acc_rf * 100))
## [1] "Accuracy of correct samples: 87.5"
set.seed(6)
# 6: 89.8 %
control_par <- trainControl(method = "cv", number=4)
model_knn <- train(Category~.,
data=train_data_all,
"knn",
trControl = control_par,
metric = "Accuracy"
)
model_knn
## k-Nearest Neighbors
##
## 6153 samples
## 6 predictor
## 4 classes: 'Idle', 'Lunges', 'Running', 'Siu'
##
## No pre-processing
## Resampling: Cross-Validated (4 fold)
## Summary of sample sizes: 4614, 4615, 4615, 4615
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.7664556 0.6875408
## 7 0.7617434 0.6812002
## 9 0.7609308 0.6800328
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 5.
KNN with cross validation 4 fold
cm_train_data <- confusionMatrix(model_knn)
cm_train_data
## Cross-Validated (4 fold) Confusion Matrix
##
## (entries are percentual average cell counts across resamples)
##
## Reference
## Prediction Idle Lunges Running Siu
## Idle 24.2 1.6 3.2 1.6
## Lunges 0.7 16.3 2.0 3.4
## Running 1.5 1.1 19.8 3.9
## Siu 0.6 1.2 2.5 16.4
##
## Accuracy (average) : 0.7665
set.seed(6)
## Generate predictions
knn_all_pred_test <- predict(model_knn,test_data_all)
## Print the accuracy
accuracy_knn_test <- mean(knn_all_pred_test == test_data_all$Category)*100
accuracy_knn_test
## [1] 78.38542
cm_test_data <- confusionMatrix(knn_all_pred_test, test_data_all$Category)
cm_test_data
## Confusion Matrix and Statistics
##
## Reference
## Prediction Idle Lunges Running Siu
## Idle 376 28 30 21
## Lunges 12 235 31 49
## Running 14 19 325 50
## Siu 13 28 37 268
##
## Overall Statistics
##
## Accuracy : 0.7839
## 95% CI : (0.7624, 0.8042)
## No Information Rate : 0.2754
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7106
##
## Mcnemar's Test P-Value : 0.0003963
##
## Statistics by Class:
##
## Class: Idle Class: Lunges Class: Running Class: Siu
## Sensitivity 0.9060 0.7581 0.7683 0.6907
## Specificity 0.9295 0.9250 0.9254 0.9321
## Pos Pred Value 0.8264 0.7187 0.7966 0.7746
## Neg Pred Value 0.9639 0.9380 0.9131 0.8992
## Prevalence 0.2702 0.2018 0.2754 0.2526
## Detection Rate 0.2448 0.1530 0.2116 0.1745
## Detection Prevalence 0.2962 0.2129 0.2656 0.2253
## Balanced Accuracy 0.9178 0.8415 0.8469 0.8114
plt <- as.data.frame(cm_test_data$table)
plt$Prediction <- factor(plt$Prediction, levels=rev(levels(plt$Prediction)))
rf_conf_mat <- ggplot(plt, aes(Prediction,Reference, fill= Freq)) +
geom_tile() + geom_text(aes(label=Freq)) +
scale_fill_gradient(low="white", high="#009194") +
labs(x = "Prediction",y = "Reference") +
scale_y_discrete(labels=c("Idle","Lunges","Running","Siu")) +
scale_x_discrete(labels=c("Siu", "Running", "Lunges", "Idle"))
ggplotly(rf_conf_mat)
remove_col <- c("ID", "Acceleration.Timestamp", "Author", "Orientation.X", "Orientation.Y", "Orientation.Z")
motion_data_test <- motion_data_test[,!names(motion_data_test) %in% remove_col]
motion_data_test$Sample <- as.numeric(as.factor(motion_data_test$Sample))
unique(motion_data_test$Category)
## [1] Idle Running Lunges Siu
## Levels: Idle Lunges Running Siu
Lunges: 1 - 20 Idle: 11 - 20 Run: 22 - 30 Siu: 31 - 40
inspect(motion_data_test)
##
## categorical variables:
## name class levels n missing
## 1 Category factor 4 1420 0
## distribution
## 1 Running (47.6%), Lunges (22.1%) ...
##
## quantitative variables:
## name class min Q1 median Q3 max
## 1 Sample numeric 1.00000 13.0000000 22.000000 27.0000000 40.00000
## 2 Acceleration.X numeric -19.24533 5.6348475 8.989855 10.3458525 74.95678
## 3 Acceleration.Y numeric -62.43217 -2.5990100 -1.087490 -0.0446425 10.99254
## 4 Acceleration.Z numeric -27.55201 -1.3236225 1.985545 4.8890625 40.44529
## 5 AngularVelocity.X numeric -7.90234 -0.5865550 -0.007295 0.5926225 12.85294
## 6 AngularVelocity.Y numeric -7.73286 -0.2825200 0.029700 0.3547175 10.01106
## 7 AngularVelocity.Z numeric -12.65705 -0.2874675 -0.006055 0.2679275 7.92185
## mean sd n missing
## 1 20.17323944 9.586790 1420 0
## 2 8.94612076 8.124970 1420 0
## 3 -2.02695663 5.929216 1420 0
## 4 1.79015740 5.362013 1420 0
## 5 0.03390892 1.698911 1420 0
## 6 0.05902677 1.120474 1420 0
## 7 -0.03541651 1.196526 1420 0
Lunges is not recognized at all: 10/10 are missclassified
Idle: 10 / 10 Samples with at least 70 % correct
Running: 10 / 10 Samples with at least 60 % correct
Siu: 9 / 10 Samples with at least 50 % correct
In total we have an avg accuracy of 60 %
total_accuracy <- 0
sample_accuracy <- 0
correct_samples_knn <- 0
for(i in 1:length(unique(motion_data_test$Sample))){
#print(i)
motion_data_unknown <- subset(motion_data_test,Sample == i) # 55.76 %
ref <- motion_data_unknown$Category[motion_data_unknown$Sample == i]
motion_data_unknown <- motion_data_unknown[,!names(motion_data_unknown) %in% c("Sample")]
motion_data_no_labels <- data.frame(motion_data_unknown)
names(motion_data_no_labels)[names(motion_data_no_labels) == "Category"] <- "Category"
motion_data_no_labels$Category <- ""
set.seed(6)
## Generate predictions
knn_pred_new <- predict(object = model_knn,newdata = motion_data_no_labels)
## Print the accuracy
accuracy <- mean(knn_pred_new == motion_data_unknown$Category )*100
total_accuracy <- total_accuracy + accuracy
motion_data_no_labels$Category = knn_pred_new
cm_rf_all <- confusionMatrix(knn_pred_new, motion_data_no_labels$Category)
#print(cm_rf_all)
test <- as.data.frame(cm_rf_all$table)
if (unique(ref) == test$Prediction[which.max(test$Freq)]) {
correct_samples_knn <- correct_samples_knn + 1
}
print(paste("Reference: ", unique(ref), "Prediction: ", test$Prediction[which.max(test$Freq)], "Accuracy: ", accuracy, sep = " "))
}
## [1] "Reference: Idle Prediction: Idle Accuracy: 63.6363636363636"
## [1] "Reference: Idle Prediction: Idle Accuracy: 93.3333333333333"
## [1] "Reference: Idle Prediction: Lunges Accuracy: 33.3333333333333"
## [1] "Reference: Idle Prediction: Idle Accuracy: 92"
## [1] "Reference: Idle Prediction: Idle Accuracy: 93.3333333333333"
## [1] "Reference: Idle Prediction: Idle Accuracy: 73.3333333333333"
## [1] "Reference: Idle Prediction: Idle Accuracy: 87.0967741935484"
## [1] "Reference: Idle Prediction: Idle Accuracy: 83.8709677419355"
## [1] "Reference: Idle Prediction: Idle Accuracy: 92.5925925925926"
## [1] "Reference: Idle Prediction: Idle Accuracy: 96.2962962962963"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 78.125"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 71.875"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 65.625"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 75"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 75"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 68.75"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 81.25"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 53.125"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 78.125"
## [1] "Reference: Lunges Prediction: Lunges Accuracy: 73.0769230769231"
## [1] "Reference: Running Prediction: Running Accuracy: 60.3174603174603"
## [1] "Reference: Running Prediction: Running Accuracy: 68.6567164179104"
## [1] "Reference: Running Prediction: Running Accuracy: 76.4705882352941"
## [1] "Reference: Running Prediction: Running Accuracy: 72.289156626506"
## [1] "Reference: Running Prediction: Running Accuracy: 78.6666666666667"
## [1] "Reference: Running Prediction: Running Accuracy: 69.1176470588235"
## [1] "Reference: Running Prediction: Running Accuracy: 67.6470588235294"
## [1] "Reference: Running Prediction: Running Accuracy: 59.375"
## [1] "Reference: Running Prediction: Running Accuracy: 64.5161290322581"
## [1] "Reference: Running Prediction: Running Accuracy: 53.448275862069"
## [1] "Reference: Siu Prediction: Siu Accuracy: 50"
## [1] "Reference: Siu Prediction: Siu Accuracy: 50"
## [1] "Reference: Siu Prediction: Siu Accuracy: 64.7058823529412"
## [1] "Reference: Siu Prediction: Siu Accuracy: 40"
## [1] "Reference: Siu Prediction: Siu Accuracy: 56.25"
## [1] "Reference: Siu Prediction: Siu Accuracy: 100"
## [1] "Reference: Siu Prediction: Siu Accuracy: 53.3333333333333"
## [1] "Reference: Siu Prediction: Siu Accuracy: 69.2307692307692"
## [1] "Reference: Siu Prediction: Siu Accuracy: 43.75"
## [1] "Reference: Siu Prediction: Siu Accuracy: 66.6666666666667"
print(paste("Amount of rows for all samples in total: ", nrow(subset(motion_data, Author == "Saghar" ))))
## [1] "Amount of rows for all samples in total: 1420"
print(paste("Amount of samples in total: ", length(unique(motion_data_test$Sample))))
## [1] "Amount of samples in total: 40"
sample_accuracy_knn <- total_accuracy / length(unique(motion_data_test$Sample))
print(paste("Accuracy over all samples: ", sample_accuracy_knn))
## [1] "Accuracy over all samples: 69.8304900373805"
avg_acc_knn <- correct_samples_knn / length(unique(motion_data_test$Sample))
print(paste("Accuracy of correct samples: ", avg_acc_knn * 100))
## [1] "Accuracy of correct samples: 97.5"
set.seed(6)
# 6: 89.8 %
control_par <- trainControl(method = "cv", number=4)
model_rpart <- train(Category~.,
data=train_data_all,
"rpart",
trControl = control_par,
metric = "Accuracy"
)
model_rpart
## CART
##
## 6153 samples
## 6 predictor
## 4 classes: 'Idle', 'Lunges', 'Running', 'Siu'
##
## No pre-processing
## Resampling: Cross-Validated (4 fold)
## Summary of sample sizes: 4614, 4615, 4615, 4615
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa
## 0.09147982 0.5527355 0.3906444
## 0.10448430 0.4700199 0.2788876
## 0.21771300 0.3526860 0.1124208
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.09147982.
# Basic plot for a decision tree
plot(model_rpart$finalModel,branch = T, margin = 0.1)
text(model_rpart$finalModel)
Rpart with cross validation 4 fold
cm_train_data <- confusionMatrix(model_rpart)
cm_train_data
## Cross-Validated (4 fold) Confusion Matrix
##
## (entries are percentual average cell counts across resamples)
##
## Reference
## Prediction Idle Lunges Running Siu
## Idle 23.4 5.7 8.2 3.6
## Lunges 0.0 0.0 0.0 0.0
## Running 2.7 4.7 14.7 4.6
## Siu 0.9 9.8 4.6 17.1
##
## Accuracy (average) : 0.5527
set.seed(6)
## Generate predictions
rpart_all_pred_test <- predict(model_rpart,test_data_all)
## Print the accuracy
accuracy_rpart_test <- mean(rpart_all_pred_test == test_data_all$Category)*100
accuracy_rpart_test
## [1] 50.39062
cm_test_data <- confusionMatrix(rpart_all_pred_test, test_data_all$Category)
cm_test_data
## Confusion Matrix and Statistics
##
## Reference
## Prediction Idle Lunges Running Siu
## Idle 387 135 225 93
## Lunges 0 0 0 0
## Running 14 24 121 29
## Siu 14 151 77 266
##
## Overall Statistics
##
## Accuracy : 0.5039
## 95% CI : (0.4786, 0.5292)
## No Information Rate : 0.2754
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.325
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: Idle Class: Lunges Class: Running Class: Siu
## Sensitivity 0.9325 0.0000 0.28605 0.6856
## Specificity 0.5959 1.0000 0.93980 0.7892
## Pos Pred Value 0.4607 NaN 0.64362 0.5236
## Neg Pred Value 0.9598 0.7982 0.77596 0.8813
## Prevalence 0.2702 0.2018 0.27539 0.2526
## Detection Rate 0.2520 0.0000 0.07878 0.1732
## Detection Prevalence 0.5469 0.0000 0.12240 0.3307
## Balanced Accuracy 0.7642 0.5000 0.61293 0.7374
plt <- as.data.frame(cm_test_data$table)
plt$Prediction <- factor(plt$Prediction, levels=rev(levels(plt$Prediction)))
rf_conf_mat <- ggplot(plt, aes(Prediction,Reference, fill= Freq)) +
geom_tile() + geom_text(aes(label=Freq)) +
scale_fill_gradient(low="white", high="#009194") +
labs(x = "Prediction",y = "Reference") +
scale_y_discrete(labels=c("Idle","Lunges","Running","Siu")) +
scale_x_discrete(labels=c("Siu", "Running", "Lunges", "Idle"))
ggplotly(rf_conf_mat)
remove_col <- c("ID", "Acceleration.Timestamp", "Author", "Orientation.X", "Orientation.Y", "Orientation.Z")
motion_data_test <- motion_data_test[,!names(motion_data_test) %in% remove_col]
motion_data_test$Sample <- as.numeric(as.factor(motion_data_test$Sample))
unique(motion_data_test$Category)
## [1] Idle Running Lunges Siu
## Levels: Idle Lunges Running Siu
Lunges: 1 - 20 Idle: 11 - 20 Run: 22 - 30 Siu: 31 - 40
inspect(motion_data_test)
##
## categorical variables:
## name class levels n missing
## 1 Category factor 4 1420 0
## distribution
## 1 Running (47.6%), Lunges (22.1%) ...
##
## quantitative variables:
## name class min Q1 median Q3 max
## 1 Sample numeric 1.00000 13.0000000 22.000000 27.0000000 40.00000
## 2 Acceleration.X numeric -19.24533 5.6348475 8.989855 10.3458525 74.95678
## 3 Acceleration.Y numeric -62.43217 -2.5990100 -1.087490 -0.0446425 10.99254
## 4 Acceleration.Z numeric -27.55201 -1.3236225 1.985545 4.8890625 40.44529
## 5 AngularVelocity.X numeric -7.90234 -0.5865550 -0.007295 0.5926225 12.85294
## 6 AngularVelocity.Y numeric -7.73286 -0.2825200 0.029700 0.3547175 10.01106
## 7 AngularVelocity.Z numeric -12.65705 -0.2874675 -0.006055 0.2679275 7.92185
## mean sd n missing
## 1 20.17323944 9.586790 1420 0
## 2 8.94612076 8.124970 1420 0
## 3 -2.02695663 5.929216 1420 0
## 4 1.79015740 5.362013 1420 0
## 5 0.03390892 1.698911 1420 0
## 6 0.05902677 1.120474 1420 0
## 7 -0.03541651 1.196526 1420 0
Lunges is not recognized at all: 10/10 are missclassified
Idle: 10 / 10 Samples with at least 70 % correct
Running: 10 / 10 Samples with at least 60 % correct
Siu: 9 / 10 Samples with at least 50 % correct
In total we have an avg accuracy of 60 %
total_accuracy <- 0
sample_accuracy <- 0
correct_samples_rpart <- 0
for(i in 1:length(unique(motion_data_test$Sample))){
#print(i)
motion_data_unknown <- subset(motion_data_test,Sample == i) # 55.76 %
ref <- motion_data_unknown$Category[motion_data_unknown$Sample == i]
motion_data_unknown <- motion_data_unknown[,!names(motion_data_unknown) %in% c("Sample")]
motion_data_no_labels <- data.frame(motion_data_unknown)
names(motion_data_no_labels)[names(motion_data_no_labels) == "Category"] <- "Category"
motion_data_no_labels$Category <- ""
set.seed(6)
## Generate predictions
rpart_pred_new <- predict(object = model_rpart,newdata = motion_data_no_labels)
## Print the accuracy
accuracy <- mean(rpart_pred_new == motion_data_unknown$Category )*100
total_accuracy <- total_accuracy + accuracy
motion_data_no_labels$Category = rpart_pred_new
cm_rf_all <- confusionMatrix(rpart_pred_new, motion_data_no_labels$Category)
#print(cm_rf_all)
test <- as.data.frame(cm_rf_all$table)
if (unique(ref) == test$Prediction[which.max(test$Freq)]) {
correct_samples_rpart <- correct_samples_rpart + 1
}
print(paste("Reference: ", unique(ref), "Prediction: ", test$Prediction[which.max(test$Freq)], "Accuracy: ", accuracy, sep = " "))
}
## [1] "Reference: Idle Prediction: Idle Accuracy: 93.9393939393939"
## [1] "Reference: Idle Prediction: Idle Accuracy: 93.3333333333333"
## [1] "Reference: Idle Prediction: Idle Accuracy: 85.7142857142857"
## [1] "Reference: Idle Prediction: Idle Accuracy: 96"
## [1] "Reference: Idle Prediction: Idle Accuracy: 96.6666666666667"
## [1] "Reference: Idle Prediction: Idle Accuracy: 93.3333333333333"
## [1] "Reference: Idle Prediction: Idle Accuracy: 77.4193548387097"
## [1] "Reference: Idle Prediction: Idle Accuracy: 96.7741935483871"
## [1] "Reference: Idle Prediction: Idle Accuracy: 66.6666666666667"
## [1] "Reference: Idle Prediction: Idle Accuracy: 92.5925925925926"
## [1] "Reference: Lunges Prediction: Siu Accuracy: 0"
## [1] "Reference: Lunges Prediction: Siu Accuracy: 0"
## [1] "Reference: Lunges Prediction: Idle Accuracy: 0"
## [1] "Reference: Lunges Prediction: Idle Accuracy: 0"
## [1] "Reference: Lunges Prediction: Siu Accuracy: 0"
## [1] "Reference: Lunges Prediction: Idle Accuracy: 0"
## [1] "Reference: Lunges Prediction: Idle Accuracy: 0"
## [1] "Reference: Lunges Prediction: Idle Accuracy: 0"
## [1] "Reference: Lunges Prediction: Siu Accuracy: 0"
## [1] "Reference: Lunges Prediction: Idle Accuracy: 0"
## [1] "Reference: Running Prediction: Running Accuracy: 39.6825396825397"
## [1] "Reference: Running Prediction: Siu Accuracy: 23.8805970149254"
## [1] "Reference: Running Prediction: Siu Accuracy: 25"
## [1] "Reference: Running Prediction: Siu Accuracy: 28.9156626506024"
## [1] "Reference: Running Prediction: Siu Accuracy: 32"
## [1] "Reference: Running Prediction: Idle Accuracy: 32.3529411764706"
## [1] "Reference: Running Prediction: Siu Accuracy: 29.4117647058824"
## [1] "Reference: Running Prediction: Siu Accuracy: 34.375"
## [1] "Reference: Running Prediction: Siu Accuracy: 33.8709677419355"
## [1] "Reference: Running Prediction: Siu Accuracy: 31.0344827586207"
## [1] "Reference: Siu Prediction: Siu Accuracy: 55"
## [1] "Reference: Siu Prediction: Siu Accuracy: 65"
## [1] "Reference: Siu Prediction: Siu Accuracy: 82.3529411764706"
## [1] "Reference: Siu Prediction: Siu Accuracy: 80"
## [1] "Reference: Siu Prediction: Siu Accuracy: 93.75"
## [1] "Reference: Siu Prediction: Running Accuracy: 0"
## [1] "Reference: Siu Prediction: Siu Accuracy: 80"
## [1] "Reference: Siu Prediction: Siu Accuracy: 92.3076923076923"
## [1] "Reference: Siu Prediction: Siu Accuracy: 81.25"
## [1] "Reference: Siu Prediction: Siu Accuracy: 91.6666666666667"
print(paste("Amount of rows for all samples in total: ", nrow(subset(motion_data, Author == "Saghar" ))))
## [1] "Amount of rows for all samples in total: 1420"
print(paste("Amount of samples in total: ", length(unique(motion_data_test$Sample))))
## [1] "Amount of samples in total: 40"
sample_accuracy_rpart <- total_accuracy / length(unique(motion_data_test$Sample))
print(paste("Accuracy over all samples: ", sample_accuracy_rpart))
## [1] "Accuracy over all samples: 48.1072769128794"
avg_acc_rpart <- correct_samples_rpart / length(unique(motion_data_test$Sample))
print(paste("Accuracy of correct samples: ", avg_acc_rpart * 100))
## [1] "Accuracy of correct samples: 50"
rf <- paste(correct_samples_rf, " out of ", length(unique(motion_data_test$Sample)))
knn <- paste(correct_samples_knn, " out of ", length(unique(motion_data_test$Sample)))
rpart <- paste(correct_samples_rpart, " out of ", length(unique(motion_data_test$Sample)))
results_models <- data.frame(
Model = c("Random_forest", "KNN", "Rpart"),
Train = c(round(mean(model_rf_all$results$Accuracy),3) * 100, round(mean(model_knn$results$Accuracy),3) * 100, round(mean(model_rpart$results$Accuracy),3) * 100),
#Test = c(accuracy_rf_test, accuracy_knn_test, accuracy_rpart_test),
Test = c(sample_accuracy_rf, sample_accuracy_knn, sample_accuracy_rpart),
"." = c("", "", ""),
Correct_Samples_Pred = c(rf, knn, rpart)
)
results_models
results_rpart <- data.frame(
Category = c("Idle", "Running", "Lunge", "Siu"),
Nr_Samples = c(10, 10, 10, 10),
Correct_Pred = c(10,0,1,9)
# Train = c(round(mean(model_rf_all$results$Accuracy),3) * 100, round(mean(model_knn$results$Accuracy),3) * 100, round(mean(model_rpart$results$Accuracy),3) * 100),
# #Test = c(accuracy_rf_test, accuracy_knn_test, accuracy_rpart_test),
#
# Test = c(sample_accuracy_rf, sample_accuracy_knn, sample_accuracy_rpart),
# "." = c("", "", ""),
# Correct_Samples_Pred = c(rf, knn, rpart)
)
results_rpart
results_randomforest <- data.frame(
Category = c("Idle", "Running", "Lunge", "Siu"),
Nr_Samples = c(10, 10, 10, 10),
Correct_Pred = c(6,10,10,9)
)
results_randomforest
results_knn <- data.frame(
Category = c("Idle", "Running", "Lunge", "Siu"),
Nr_Samples = c(10, 10, 10, 10),
Correct_Pred = c(9,10,10,10)
)
results_knn